!! Copyright (C) 2009,2010,2011,2012  Marco Restelli
!!
!! This file is part of:
!!   FEMilaro -- Finite Element Method toolkit
!!
!! FEMilaro is free software; you can redistribute it and/or modify it
!! under the terms of the GNU General Public License as published by
!! the Free Software Foundation; either version 3 of the License, or
!! (at your option) any later version.
!!
!! FEMilaro is distributed in the hope that it will be useful, but
!! WITHOUT ANY WARRANTY; without even the implied warranty of
!! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
!! General Public License for more details.
!!
!! You should have received a copy of the GNU General Public License
!! along with FEMilaro; If not, see <http://www.gnu.org/licenses/>.
!!
!! author: Marco Restelli                   <marco.restelli@gmail.com>

!>\brief
!! This module provides some subroutines for the computation of
!! eigenvalues and eigenvectors.
!!
!! \n
!!
!! The subroutines implemented here are a fortran translation of the
!! GSL subroutine implemented in <tt>eigen/nonsymm.c</tt>. For further
!! details, see the <a href="http://www.gnu.org/software/gsl/">GSL
!! library</a>.
!<----------------------------------------------------------------------
module mod_eigen

!-----------------------------------------------------------------------

 use mod_messages, only: &
   mod_messages_initialized, &
   error,   &
   warning, &
   info

 use mod_kinds, only: &
   mod_kinds_initialized, &
   wp

!-----------------------------------------------------------------------
 
 implicit none

!-----------------------------------------------------------------------

! Module interface

 public :: &
   mod_eigen_constructor, &
   mod_eigen_destructor,  &
   mod_eigen_initialized, &
   eigen_nonsymm

 private

!-----------------------------------------------------------------------

! Module types and parameters

 ! private members
 !intrinsic :: norm2, hypot ! these are indeed in the f2008 standard
 intrinsic :: hypot ! workaround for FERMI
 !> \defgroup gsl_machine_constants GSL machine constants
 !! For the following constants, see <tt>gsl_machine.h</tt>.
 !! @{
 real(wp), parameter :: dbl_min     = tiny(1.0_wp)
 real(wp), parameter :: dbl_max     = huge(1.0_wp)
 real(wp), parameter :: dbl_epsilon = epsilon(1.0_wp)
 !! @}

! Module variables

 ! public members
 logical, protected ::               &
   mod_eigen_initialized = .false.
 ! private members
 character(len=*), parameter :: &
   this_mod_name = 'mod_eigen'

!-----------------------------------------------------------------------

contains

!-----------------------------------------------------------------------

 subroutine mod_eigen_constructor()
  character(len=*), parameter :: &
    this_sub_name = 'constructor'

   !Consistency checks ---------------------------
   if( (mod_messages_initialized.eqv..false.) .or. &
          (mod_kinds_initialized.eqv..false.) ) then
     call error(this_sub_name,this_mod_name, &
                'Not all the required modules are initialized.')
   endif
   if(mod_eigen_initialized.eqv..true.) then
     call warning(this_sub_name,this_mod_name, &
                  'Module is already initialized.')
   endif
   !----------------------------------------------

   mod_eigen_initialized = .true.
 end subroutine mod_eigen_constructor

!-----------------------------------------------------------------------
 
 subroutine mod_eigen_destructor()
  character(len=*), parameter :: &
    this_sub_name = 'destructor'
   
   !Consistency checks ---------------------------
   if(mod_eigen_initialized.eqv..false.) then
     call error(this_sub_name,this_mod_name, &
                'This module is not initialized.')
   endif
   !----------------------------------------------

   mod_eigen_initialized = .false.
 end subroutine mod_eigen_destructor

!-----------------------------------------------------------------------
 
 !> Compute the eigenvalues and eigenvectors of a generic real matrix.
 !!
 !! This is a frotran translation of the GSL function
 !! <tt>gsl_eigen_nonsymmv</tt>. The algorithm works as follows.
 !! <ul>
 !!  <li> The matrix is reduced in Hessenberg form. This is done in
 !!  two steps: first the Hessenberg vectors are computed, than the
 !!  complete Hessenberg matrix is assembled. This is a translation of
 !!  the GSL subroutines <tt>gsl_linalg_hessenberg_decomp</tt> and
 !!  <tt>gsl_linalg_hessenberg_unpack</tt>.
 !! </ul>
 pure subroutine eigen_nonsymm(a,n_evals,eval_r,eval_i,evec_r,evec_i)
  real(wp), intent(inout) :: a(:,:)
  integer, intent(out) :: n_evals !< number of computed eigenvalues
  real(wp), intent(out) :: eval_r(:), eval_i(:) !< eigenvalues
  !> eigenvectors
  real(wp), intent(out), optional :: evec_r(:,:), evec_i(:,:)

  integer :: i, n1, n2, n_iter
  real(wp) :: scale
  real(wp) :: tau(size(a,1)) !< Hessenberg coefficients
  real(wp) :: u(size(a,1),size(a,1)) !< transformation matrix
  character(len=*), parameter :: &
    this_sub_name = 'eigen_nonsymm'
 
   ! 1) An optional call to gsl_linalg_balance_matrix could be
   ! inserted here. If the matrix is balanced, then afterwards the
   ! eigenvectors must be corrected accordingly.

   ! 2) Compute the Hessemberg reduction of A
   ! Note: after these two calls
   ! a -> Hessenberg form of the original matrix in the upper part and
   !      Hessenberg vectors in the lower part
   ! tau -> Hessenberg coefficients
   ! u -> the transformation matrix
   call hessenberg_decomp(a,tau)
   call hessenberg_unpack(u,a,tau)

   ! Before we can call eigen_francis we need to zero (at least) some
   ! elements of the lower part of a, because those zeros will enter
   ! the computations. Indeed, the algorithm used in eigen_francis
   ! (and specifcally in francis_qrstep) temporary destroys the
   ! Hessenberg form using also the second and the third subdiagonals.
   ! Such subdiagonals must thus contain the correct values, i.e. 0.
   do i=3,size(a,1); a(i,i-2) = 0.0_wp; enddo ! second diagonal
   do i=4,size(a,1); a(i,i-3) = 0.0_wp; enddo ! third diagonal

   ! 3) Compute the eigenvalues
   n1 = 1; n2 = size(a,1)
   n_iter = 0; n_evals = 0
   call eigen_francis( a,u , n1,n2 , n_iter , n_evals,eval_r,eval_i)

   ! 4) Compute the eigenvectors
   if((present(evec_r)).and.(n_evals.eq.size(a,1))) then
     call get_right_eigenvectors(a,u,eval_r,eval_i,evec_r,evec_i)
     ! normalization
     do i=1,size(a,1)
       !scale = norm2(hypot(evec_r(:,i),evec_i(:,i)))
       ! workaround for FERMI
           scale = sqrt(sum(hypot(evec_r(:,i),evec_i(:,i))**2))
       if(scale.ne.0.0_wp) then
         evec_r(:,i) = evec_r(:,i)/scale
         evec_i(:,i) = evec_i(:,i)/scale
       endif
     enddo
   endif

   ! 5) Correct the eigenvectors if the matrix has been balanced by
   ! calling gsl_linalg_balance_accum

 end subroutine eigen_nonsymm
 
!-----------------------------------------------------------------------
 
 !> Francis QR method for the computation of the eigenvalues of an
 !! Hessenberg real matrix.
 !!
 !! Typically, the Hessenberg matrix was obtained by a generic square
 !! real matrix, so that we have
 !! \f{displaymath}{
 !!  H = U^TAU, \qquad T=Q^THQ,
 !! \f}
 !! and the Schur decomposition of the original matrix
 !! \f{displaymath}{
 !!  A = (UQ)T(UQ)^T.
 !! \f}
 !! The workflow of this function is as follows.
 !! <ul>
 !!  <li> Make a QR iteration with shift.
 !!  <li> If the last (or the first) subdiagonal element is zero, then
 !!  we have found an eigenvalue: store it and shrink the active
 !!  matrix.
 !!  <li> If there is an isolated two-by-two block at the right,
 !!  bottom corner (or at the left top one), we have found two
 !!  eigenvalues: store them and shrink the active matrix.
 !!  <li> If the matrix can be reduced, split it into two blocks and
 !!  apply the QR algorithm recursively to each block.
 !! </ul>
 !!
 !! A note is required about the active matrix. This is a block placed
 !! on the main diagonal which shrinks as new eigenvalues are found,
 !! and it is the only one which is relevant for the computation of
 !! the eigenvalues. However, the portion of the matrix which is not
 !! active can not be completely neglected for two reasons. First of
 !! all, the transformations applied to the active region must be
 !! applied also to the inactive one. Then, the transformations
 !! applied to the active region must be accumulated in the overall
 !! transformation matrix \f$Q\f$. These aspects must be taken care of
 !! whenever a transformation matrix is applied, which means the two
 !! subroutines \c francis_qrstep and \c francis_schur_standardize.
 !!
 !! A second note concerns the recursive implementation of \c
 !! eigen_francis, which allows to take into account the case when the
 !! matrix is split into two submatrices. This implies that some
 !! arguments which are passed down through the recursive calls have
 !! to be initialized before calling this subroutine. Notice also
 !! that, as the active matrix shrinks during the recursive calls, the
 !! complete matrix must still be passed around for the reasons
 !! described previously.
 !!
 !! \warning The algorithm used for the QR iterations uses also the
 !! second and third subdiagonals of \c h. For this reason, it is
 !! necessary to set (at least) such diagonals to 0 in \c h before
 !! calling this function. Lower diagonals are not used and can store
 !! any value, which is not altered during the computations.
 pure recursive subroutine eigen_francis( h , u , n1,n2 , &
                            n_iter , n_evals,eval_r,eval_i)
  real(wp), intent(inout) :: h(:,:) !< Hessenberg matrix
  real(wp), intent(inout) :: u(:,:) !< transformation matrix
  ! !> \defgroup eigen_francis_inout_args <tt>inout</tt> arguments
  ! !! The following arguments must be initialized before calling.
  ! !! @{
  integer, intent(inout) :: n1, n2 !< indexes of the active region
  !> Count the iterations from the last found eigenvalue: it must be
  !! initialized to 0 and it is reset to 0 each time a new eigenvalue
  !! is found.
  integer, intent(inout) :: n_iter
  !> Count the found eigenvalues: in practice, this is used to fill
  !! <tt>eval_r</tt> and <tt>eval_i</tt> in the proper order. It must
  !! be initialized to 0. Notice that during the recursive calls the
  !! size of the matrix \c h becomes smaller, while <tt>eval_r</tt>
  !! and <tt>eval_i</tt> are passed entirely and indexed with \c
  !! n_evals.
  integer, intent(inout) :: n_evals
  ! !! @}
  real(wp), intent(inout) :: eval_r(:) !< eigenvalues (real)
  real(wp), intent(inout) :: eval_i(:) !< eigenvalues (imaginary)

  integer, parameter :: max_iter = 1000
  integer :: nd, q, n1r, n2r
  real(wp) :: l_r(2), l_i(2)
 
  character(len=*), parameter :: &
    this_sub_name = 'eigen_francis'

   ! Set the active matrix boundaries
   nd = n2-n1+1 ! dimension of the active matrix

   ! QR iterations
   qr_main_do: do
    if( (nd.le.2).or.(n_iter.gt.max_iter) ) exit qr_main_do

     n_iter = n_iter+1

     call francis_search_subdiagonal_small_elements( q , h(n1:n2,n1:n2) )

     found_something_if: if(q.eq.0) then
       ! no small subdiagonal element found - perform a QR sweep on
       ! the active reduced Hessenberg matrix
       call francis_qrstep(h,u , n1,n2 , n_iter)
     else

       ! Note: we don't use a case switch because the alternatives are
       ! not mutually exclusive
       if(q.eq.nd) then
         ! the last subdiagonal element of the matrix is 0 -> h(n2,n2)
         ! is a real eigenvalue
         n_evals = n_evals+1
         eval_r(n_evals) = h(n2,n2); eval_i(n_evals) = 0.0_wp
         ! reset the iteration counter
         n_iter = 0
         ! reduce the active matrix dropping the last line
         n2 = n2-1
         nd = nd-1

       elseif(q.eq.nd-1) then
         ! the bottom right 2-by-2 block of h is an eigenvalue system
         call francis_schur_standardize(h,u , l_r,l_i , n2-1)
         n_evals = n_evals+1
         eval_r(n_evals) = l_r(1); eval_i(n_evals) = l_i(1)
         n_evals = n_evals+1
         eval_r(n_evals) = l_r(2); eval_i(n_evals) = l_i(2)
         ! reset the iteration counter
         n_iter = 0
         ! reduce the active matrix dropping the last line
         n2 = n2-2
         nd = nd-2

       elseif(q.eq.2) then
         ! the first matrix element is an eigenvalue
         n_evals = n_evals+1
         eval_r(n_evals) = h(n1,n1); eval_i(n_evals) = 0.0_wp
         ! reset the iteration counter
         n_iter = 0
         ! reduce the active matrix dropping the last line
         n1 = n1+1
         nd = nd-1

       elseif(q.eq.3) then
         ! the upper left 2-by-2 block is an eigenvalue system
         call francis_schur_standardize(h,u , l_r,l_i , n1)
         n_evals = n_evals+1
         eval_r(n_evals) = l_r(1); eval_i(n_evals) = l_i(1)
         n_evals = n_evals+1
         eval_r(n_evals) = l_r(2); eval_i(n_evals) = l_i(2)
         ! reset the iteration counter
         n_iter = 0
         ! reduce the active matrix dropping the last line
         n1 = n1+2
         nd = nd-2

       else
         ! There is a zero element on the subdiagonal somewhere in the
         ! middle of the matrix - we can now operate separately on the
         ! two submatrices split by this element. q is the row index
         ! of the zero element.

         ! operate on lower right block
         n1r = q ; n2r = n2
         call eigen_francis(h,u,n1r,n2r,n_iter,n_evals,eval_r,eval_i)

         ! operate on upper left block
         n1r = n1 ; n2r = q-1
         call eigen_francis(h,u,n1r,n2r,n_iter,n_evals,eval_r,eval_i)

         ! when the recursive calls return the work is done
         nd = 0

       endif

     endif found_something_if

   enddo qr_main_do

   ! handle special cases of nd = 1 or 2
   if(nd.eq.1) then
     ! h(n1,n1) is a real eigenvalue
     n_evals = n_evals+1
     eval_r(n_evals) = h(n1,n1); eval_i(n_evals) = 0.0_wp
     ! reset the iteration counter
     n_iter = 0
   elseif(nd.eq.2) then
     ! h(n1:n2,n1:n2) is a 2-by-2 eigenvalue system
     call francis_schur_standardize(h,u , l_r,l_i , n1)
     n_evals = n_evals+1
     eval_r(n_evals) = l_r(1); eval_i(n_evals) = l_i(1)
     n_evals = n_evals+1
     eval_r(n_evals) = l_r(2); eval_i(n_evals) = l_i(2)
     ! reset the iteration counter
     n_iter = 0
   endif
 
 contains

  ! Locate a small subdiagonal element.
  !
  ! Notice that:
  ! 1) the search starts from the bottom right corner
  ! 2) the small matrix element is set to zero
  ! 3) we assume that size(a,1) is at least 3
  ! 4) the returned value is  2 <= q <= size(a,1)  if a small element
  !   has been found, 0 otherwise
  pure subroutine francis_search_subdiagonal_small_elements(q,a)
   real(wp), intent(inout) :: a(:,:)
   integer, intent(out) :: q

   integer :: i
   real(wp) :: adp, ad, as

    adp = a(size(a,1)-1,size(a,2)-1) ! neighbour diagonal element

    do i=size(a,1),2,-1
      ad = a(i, i ) ! diagonal element
      as = a(i,i-1) ! subdiagonal element
      if(               (as.eq.0.0_wp) .or.           &
          (abs(as).lt.dbl_epsilon*(abs(ad)+abs(adp))) ) then
        a(i,i-1) = 0.0_wp
        q = i
        return
      endif
      adp = ad
    enddo

    ! if we get here we have not found any small element
    q = 0
  end subroutine francis_search_subdiagonal_small_elements

  !> Implement one QR sweep
  !! 
  !! This is a translation of \c francis_qrstep in
  !! <tt>eigen/francis.c</tt>. The algorithm includes: choosing the
  !! shift, compute the Householder matrix, applying it to the left and
  !! to the right of \c h and accumulating it in the complete
  !! transformation matrix. Notice that the Householder matrix is
  !! computed by looking only at the active matrix, however it is
  !! applied to the complete matrix h and transformation u. In fact,
  !! together with \c francis_schur_standardize, this is one of the
  !! two subroutines where the transformations are applied and
  !! accumulated.
  pure subroutine francis_qrstep(h,u , n1,n2 , niter)
   integer, intent(in) :: n1, n2 !< limits of the active matrix
   integer, intent(in) :: niter !< iteration counter
   real(wp), intent(inout) :: h(:,:), u(:,:)

   real(wp), parameter :: gsl_francis_coeff1 = 0.75_wp
   real(wp), parameter :: gsl_francis_coeff2 = -0.4375_wp
   integer :: j, nd, ii1, ii2, jj1, k
   real(wp) :: s, h_nn, h_nm1nm1, h_cross, disc, ave, h_tmp1, h_tmp2, &
     dat(3), scale
   real(wp) :: v_j(3), tau_j, wj

    ! Compute the shift
    shift_def_if: if(mod(niter,10).eq.0) then
      ! Exceptional shifts: we have gone 10 iterations without finding
      ! a new eigenvalue, try a new choice of shifts. See LAPACK
      ! routine DLAHQR.
      s = abs(h(n2,n2-1)) + abs(h(n2-1,n2-2))
      h_nn = h(n2,n2) + gsl_francis_coeff1*s
      h_nm1nm1 = h_nn
      h_cross = gsl_francis_coeff2*s*s
    else
      ! normal shifts - compute Rayleigh quotient and use Wilkinson
      ! shift if possible
      h_nn     = h(  n2,n2  )
      h_nm1nm1 = h(n2-1,n2-1)
      h_cross  = h(n2,n2-1) * h(n2-1,n2)
      disc = 0.5_wp*(h_nm1nm1-h_nn)
      disc = disc*disc + h_cross
      if(disc.gt.0.0_wp) then
        ! real roots - use Wilkinson's shift twice
        disc = sqrt(disc)
        ave = 0.5_wp*(h_nm1nm1+h_nn)
        if((abs(h_nm1nm1)-abs(h_nn)).gt.0.0_wp) then
          h_nm1nm1 = h_nm1nm1 * h_nn - h_cross
          h_nn = h_nm1nm1/(disc*sign(1.0_wp,ave)+ave)
        else
          h_nn = disc*sign(1.0_wp,ave)+ave
        endif
        h_nm1nm1 = h_nn
        h_cross = 0.0_wp
      endif
    endif shift_def_if
    h_tmp1 = h_nm1nm1 - h(n1,n1)
    h_tmp2 = h_nn     - h(n1,n1)
    ! These formulas are equivalent to those in Golub, Van Loan for
    ! the normal shift case: see the definition of x,y,z in the
    ! computation of Me1, section 7.5 - the terms have been rearranged
    ! to reduce possible roundoff error when subdiagonal elements are
    ! small.
    dat(1) = (h_tmp1*h_tmp2-h_cross)/h(n1+1,n1) + h(n1,n1+1)
    dat(2) = h(n1+1,n1+1) - h(n1,n1) - h_tmp1 - h_tmp2
    dat(3) = h(n1+2,n1+1)
    scale = sum(abs(dat))
    if(scale.ne.0.0_wp) dat = dat/scale

    ! The Householder transformation is now computed with respect to
    ! the vector dat. The present value of dat includes the double
    ! shift, according to the Francis QR algorithm. Notice that the
    ! before-last column will be handled outside the loop. Notice also
    ! that, since the first Householder matrix P0 is computed
    ! considering two complex shifts, it does not preserve the
    ! Hessenberg structure and an additional diagonal appears. The
    ! following Householder matrices will restore the Hessenberg
    ! structure and to do so they will have a 3-by-3 block structure,
    ! rather than the usual 2-by-2 one.
    nd = n2-n1+1
    column_do: do j=1,nd-2
      
      ! Compute the Householder vector
      call get_householder_vector( tau_j , v_j , dat )

      tau_j_if: if(tau_j.ne.0.0_wp) then ! otherwise nothing to do

        ! the first iteration works on (as for premultiplication)
        !   h(   n1  : n1+3  ,   n1  : )
        ! the second iteration works on
        !   h(  n1+1 : n1+4  ,   n1  : )
        ! the following iterations work on
        !   h( n1+j-1:n1+j+2 , n1+j-2: )
        ! We can summarize these indexes as follows
        ii1 =     n1+j-1;  ii2 = ii1+2
        jj1 = max(n1+j-2,n1) ! max activated during the first iteration
        do k=jj1,size(h,2) ! premultiplication
          ! Warning: v_j(j+1) does not hold the correct value, which
          ! is always 1 (see the comments in get_householder_vector)
          wj = h(ii1,k) + dot_product(v_j(2:),h(ii1+1:ii2,k))
          h(   ii1   ,k) = h(   ii1   ,k) - tau_j * wj
          h(ii1+1:ii2,k) = h(ii1+1:ii2,k) - tau_j * wj * v_j(2:)
        enddo
        ! In the postmultiplication we have to remember that the
        ! values of h are correct only until the third subdiagonal,
        ! i.e. row n1-1+j+3. This is all what is required for the
        ! computations. However, we have to make sure that we do not
        ! "pick up" anything from h from below the third subdiagonal:
        ! this motivates the nontrivial upper bound for k in the
        ! following loop. Also, notice that we stop at n2: in
        ! principle one could continue until size(h,1), since those
        ! lines are zero; this however would "pollute" with numerical
        ! noise the zeros introduced by
        ! francis_search_subdiagonal_small_elements, which are used to
        ! distinguish among real and complex eigenvalues.
        do k=1,min(n1-1+j+3,n2) ! postmultiplication
          wj = h(k,ii1) + dot_product(v_j(2:),h(k,ii1+1:ii2))
          h(k,   ii1   ) = h(k,   ii1   ) - tau_j * wj
          h(k,ii1+1:ii2) = h(k,ii1+1:ii2) - tau_j * wj * v_j(2:)
        enddo

        ! accumulate the Householder transformation
        do k=1,size(u,1) ! postmultiplication
          wj = u(k,ii1) + dot_product(v_j(2:),u(k,ii1+1:ii2))
          u(k,   ii1   ) = u(k,   ii1   ) - tau_j * wj
          u(k,ii1+1:ii2) = u(k,ii1+1:ii2) - tau_j * wj * v_j(2:)
        enddo

      endif tau_j_if

      ! Set dat for the next column
      dat(1) = h(n1+j  ,n1+j-1)
      dat(2) = h(n1+j+1,n1+j-1)
      if(j.ne.nd-2) dat(3) = h(n1+j+2,n1+j-1)
      scale = sum(abs(dat))
      if(scale.ne.0.0_wp) dat = dat/scale

    enddo column_do

    ! last Householder matrix, which is only 2-by-2
    scale = sum(abs(dat(1:2)))
    if(scale.ne.0.0_wp) dat(1:2) = dat(1:2)/scale
    call get_householder_vector( tau_j , v_j(1:2) , dat(1:2) )
    ii1 = n2-1
    jj1 = n2-2
    do k=jj1,size(h,2) ! premultiplication
      wj = h(ii1,k) + v_j(2)*h(n2,k)
      h(ii1,k) = h(ii1,k) - tau_j * wj
      h( n2,k) = h( n2,k) - tau_j * wj * v_j(2)
    enddo
    do k=1,n2 ! postmultiplication
      wj = h(k,ii1) + v_j(2)*h(k,n2)
      h(k,ii1) = h(k,ii1) - tau_j * wj
      h(k, n2) = h(k, n2) - tau_j * wj * v_j(2)
    enddo

    ! accumulate the Householder transformation
    do k=1,size(u,1) ! postmultiplication
      wj = u(k,ii1) + v_j(2)*u(k,n2)
      u(k,ii1) = u(k,ii1) - tau_j * wj
      u(k, n2) = u(k, n2) - tau_j * wj * v_j(2)
    enddo

  end subroutine francis_qrstep

  !> Compute the two complex eigenvalues of a 2-by-2 block
  !!
  !! The computation of the two eigenvalues requires a similarity
  !! transformation. Besides acting on the 2-by-2 block, this
  !! transformation has two effects: it changes the whole "row" and
  !! "column" of the matrix corresponding to the 2-by-2 block and it
  !! must be accumulated in the overall orthogonal matrix. This is the
  !! reason why the whole matrix \c h must be passed, and not only the
  !! 2-by-2 block, as well as the whole transformation matrix. For
  !! additional details see \c \c francis_schur_standardize in
  !! <tt>eigen/francis.c</tt>.
  pure subroutine francis_schur_standardize(h,u , l_r,l_i , ib)
   integer, intent(in) :: ib !< upper right index of the 2-by-2 block
   real(wp), intent(inout) :: h(:,:), u(:,:)
   real(wp), intent(out) :: l_r(:), l_i(:)

   real(wp) :: r(2,2) ! rotation matrix

    ! reduce to standard form
    call francis_standard_form( h(ib:ib+1,ib:ib+1) ,r)

    ! compute the eigenvalues
    l_r(1) = h(ib,ib); l_r(2) = h(ib+1,ib+1) ! real part
    if(h(ib+1,ib).eq.0.0_wp) then ! real eigenvalues
      l_i = 0.0_wp
    else
      l_i(1) = sqrt( abs( h(ib+1,ib)*h(ib,ib+1) ) )
      l_i(2) = -l_i(1)
    endif

    ! apply r to the whole matrix h
    if(ib.lt.size(h,1)-1) & ! transform two rows of h
      h(ib:ib+1,ib+2:) = matmul( transpose(r) , h(ib:ib+1,ib+2:) )
    if(ib.gt.1)           & ! transform two columns of h
      h(:ib-1,ib:ib+1) = matmul( h(:ib-1,ib:ib+1) , r )
     
    ! accumulate the transformation r in the global matrix u
    u(:,ib:ib+1) = matmul( u(:,ib:ib+1) , r )

  end subroutine francis_schur_standardize

  !> Reduce a 2-by-2 real matrix in standard form: the transformation
  !! is done by means of a rotation matrix and the transformed matrix
  !! is either upper triangular or such that a=d and b*d<0. The output
  !! argument \c r is such that
  !! \code
  !!  m = matmul( transpose(r) , matmul(m,r) )
  !! \endcode
  !! For further details, see \c francis_standard_form in
  !! <tt>eigen/francis.c</tt>.
  pure subroutine francis_standard_form(m,r)
   real(wp), intent(inout) :: m(:,:) !< matrix
   real(wp), intent(out)   :: r(:,:) !< rotation matrix

   real(wp) :: a, b, c, d, tmp, p, bcmax, bcmis, scale, z, tau, &
     sigma, aa, bb, cc, dd, sab, sac, cs1, sn1
   real(wp) :: cs, sn

    a = m(1,1) ; b = m(1,2)
    c = m(2,1) ; d = m(2,2)

    if(c.eq.0.0_wp) then
      ! matrix is already upper triangular - set rotation matrix to
      ! the identity
      r = reshape( (/ 1.0_wp , 0.0_wp ,    &
                      0.0_wp , 1.0_wp /) , &
                 (/2,2/) , order=(/2,1/) )
    elseif(b.eq.0.0_wp) then
      ! swap rows and columns to make it upper triangular
      r = reshape( (/ 0.0_wp , 1.0_wp ,    &
                      1.0_wp , 0.0_wp /) , &
                 (/2,2/) , order=(/2,1/) )
      tmp = d; d = a; a = tmp ! swap a and d
      b = -c;  c = 0.0_wp     ! swap b and c
    elseif( ((a-d).eq.0.0_wp).and.(sign(1.0_wp,b*c).lt.0.0_wp) ) then
      ! the matrix has complex eigenvalues and is already in standard
      ! form: nothing to do
      r = reshape( (/ 1.0_wp , 0.0_wp ,    &
                      0.0_wp , 1.0_wp /) , &
                 (/2,2/) , order=(/2,1/) )
    else
      ! the matrix need some nontrivial processing
      tmp = a-d
      p = 0.5_wp*tmp
      bcmax = max(abs(b),abs(c))
      bcmis = sign( min(abs(b),abs(c)) , b*c ) ! sign(b*c)*min(|b|,|c|)
      scale = max(abs(p),bcmax)
      ! Having  Delta = (a-d)^2+4bc  we compute  z = Delta/(4*scale)
      z = (p/scale)*p + (bcmax/scale)*bcmis
      if(z.ge.4.0_wp*dbl_epsilon) then
        ! real eigenvalues, compute a and d
        z = p + sign( sqrt(scale)*sqrt(z) , p )
        a = d + z
        d = d - (bcmax/z)*bcmis
        ! compute b and the rotation matrix
        tau = hypot(c,z)
        cs = z/tau
        sn = c/tau
        b = b - c
        c = 0.0_wp
      else
        ! complex eigenvalues, or real (almost) equal eigenvalues -
        ! make diagonal element equal
        sigma = b + c
        tau = hypot(sigma,tmp)
        cs = sqrt( 0.5_wp*(1.0_wp+abs(sigma)/tau) )
        sn = -(p/(tau*cs))*sign(1.0_wp,sigma)
        ! postmultiplication by r
        aa = a*cs + b*sn; bb = -a*sn + b*cs
        cc = c*cs + d*sn; dd = -c*sn + d*cs
        ! premultiplication by r^T
        a =  aa*cs + cc*sn; b =  bb*cs + dd*sn
        c = -aa*sn + cc*cs; d = -bb*sn + dd*cs
        ! make the diagonal elements really equal
        tmp = 0.5_wp*(a+d)
        a = tmp; d = tmp
        ! now some final adjustments
        if(c.ne.0.0_wp) then ! not done yet
          if(b.ne.0.0_wp) then ! swapping is not enough
            if(sign(1.0_wp,b*c).gt.0.0_wp) then
              ! real eigenvalues: reduce to upper triangular form
              sab = sqrt(abs(b))
              sac = sqrt(abs(c))
              p = sign(sab*sac,c)
              tau = 1.0_wp / sqrt(abs(b+c))
              a = tmp + p
              d = tmp -p
              b = b - c
              c = 0.0_wp
              ! update cs and sn
              cs1 = sab * tau
              sn1 = sac * tau
              tmp = cs*cs1 - sn*sn1
              sn  = cs*sn1 + sn*cs1
              cs  = tmp
            endif
            ! The "else" branch would be for complex eigenvalues, and
            ! in that case we are done.
          else
            ! a swapping (and sign change) will conclude the job
            b = -c;   c = 0.0_wp
            tmp = cs; cs = -sn; sn = tmp
          endif
        endif
      endif
      r = reshape( (/ cs , -sn ,           &
                      sn ,  cs /) ,        &
                 (/2,2/) , order=(/2,1/) )
    endif

    ! set new matrix elements
    m(1,1) = a ; m(1,2) = b
    m(2,1) = c ; m(2,2) = d

  end subroutine francis_standard_form
 
 end subroutine eigen_francis
 
!-----------------------------------------------------------------------

 !> Given the Schur decomposition, compute the eigenvectors
 !!
 !! This is a Fortran translation of \c
 !! nonsymmv_get_right_eigenvectors, in <tt>nonsymmv.c</tt>. The idea
 !! is simple: first compute the eigenvectors of the triangular matrix
 !! \f$T\f$ and then premultiply by the matrix of the Schur vectors to
 !! get the eigenvectors of the original matrix.
 !!
 !! Some complications arise because we are working with the
 !! <em>real</em> Schur decomposition, so that \f$T\f$ is not
 !! completely triangular.
 !!
 !! Notice that the eigenvectors of a triangular matrix can be
 !! computed as follows: partition the matrix as
 !! \f{displaymath}{
 !!  \left[ \begin{array}{ccc}
 !!   T_{11} & T_{12} & T_{13} \\ & T_{22} & T_{23} \\ & & T_{33}
 !!  \end{array} \right]
 !!  \left[ \begin{array}{c}
 !!   v_{1} \\ v_{2} \\ v_{3}
 !!  \end{array} \right] = 
 !!  \left[ \begin{array}{c}
 !!   \lambda v_{1} \\ \lambda v_{2}\\ \lambda v_{3}
 !!  \end{array} \right],
 !! \f}
 !! then, assuming that \f$\lambda\f$ is a known eigenvalue of
 !! \f$T_{22}\f$, the corresponding eigenvector is obtained by
 !! \f{displaymath}{
 !!  \begin{array}{lll}
 !!   v_{1} & = & \textrm{solution\,\,of\,\,\,\,}(T_{11}-\lambda I)v_1 =
 !!   -T_{12}v_2 \\
 !!   v_{2} & = & \textrm{eigenvector\,\,of\,\,\,\,}T_{22} \\
 !!   v_{3} & = & 0.
 !!  \end{array} 
 !! \f}
 !! This allows to compute all the eigenvectors, by identifying
 !! \f$T_{22}\f$ with one diagonal element (real eigenvalues) or one
 !! diagonal 2-by-2 block (complex eigenvalues) of \f$T\f$. In the
 !! first case \f$v_2\f$ is simply the constant 1 (upon
 !! normalization), in the second case we need the eigenvectors of the
 !! 2-by-2 block.
 !!
 !! The computation of \f$v_1\f$ is straightforward if
 !! \f$T_{11}-\lambda I\f$ is triangular and nonsingular; however,
 !! singular matrices arise in presence of multiple eigenvalues and
 !! 2-by-2 blocks can be present. This causes some technical
 !! complications. Specifically, the backsubstitution is performed
 !! either one line at a time or, for 2-by-2 diagonal blocks in
 !! \f$T_{11}\f$, by working simultaneously on two lines; the
 !! presence of singular blocks is dealt with following the functions
 !! <tt>gsl_schur_solve_equation</tt> (when \f$T_{22}\f$ is 1-by-1)
 !! and <tt>gsl_schur_solve_equation_z</tt> (when \f$T_{22}\f$ is
 !! 2-by-2), both implemented in <tt>schur.c</tt>.
 pure subroutine get_right_eigenvectors(t,z,eval_r,eval_i,evec_r,evec_i)
  real(wp), intent(in)    :: t(:,:) !< Schur matrix
  real(wp), intent(inout) :: z(:,:) !< Schur vectors (overwritten)
  !> Notice that the eigenvalues are reset, to ensure consistent
  !! ordering among eigenvalues and eigenvectors.
  real(wp), intent(out) :: eval_r(:) !< eigenvalues (real)
  real(wp), intent(out) :: eval_i(:) !< eigenvalues (imaginary)
  real(wp), intent(out) :: evec_r(:,:) !< eigenvectors (real)
  real(wp), intent(out) :: evec_i(:,:) !< eigenvectors (imaginary)

  real(wp), parameter :: &
    nonsymmv_smlnum  = 2.0_wp*dbl_min,                       &
    schur_bignum     = (1.0_wp-dbl_epsilon)/nonsymmv_smlnum
  logical :: complex_eigen, complex_pair
  integer :: nd, j, i
  real(wp) :: l_r, l_i ! eigenvalues, real and imaginary parts
  real(wp) :: smlnum, bignum, smin, scale, xnorm, xi(2)
  real(wp) :: normt(size(t,2)) ! column norm of t (excluding diagonal)
  real(wp) :: rhs_x(size(t,1)) ! work array for triangular systems

   nd = size(t,2)
   smlnum = dbl_min*real(nd,wp)/dbl_epsilon
   bignum = (1.0_wp-dbl_epsilon)/smlnum

   ! Compute 1-norm of each column of (strictly) upper triangular part
   ! of t to control overflow in triangular solvers.
   do j=1,nd
     normt(j) = sum(abs(t(1:j-1,j)))
   enddo

   eigen_do: do j=nd,1,-1

     ! Current eigenvalue
     l_r = t(j,j); l_i = 0.0_wp; complex_eigen = .false.
     if(j.gt.1) then
       if(t(j,j-1).ne.0.0_wp) then ! 2-by-2 block
         l_i = sqrt(abs(t(j,j-1)))*sqrt(abs(t(j-1,1)))
         complex_eigen = .true.
       endif
     endif
     smin = max( dbl_epsilon*(abs(l_r)+abs(l_i)) , smlnum )
     smin = max( smin , nonsymmv_smlnum )

     ! Two paths depending on the size of T22
     t22_if: if(l_i.eq.0) then ! 1-by-1 block

       ! Reset eval so that the ordering is consistent among
       ! eigenvalues and eigenvectors
       eval_r(j) = l_r; eval_i(j) = l_i

       ! Back-substitution: notice that the rhs and the solution are
       ! written in the same array rhs_x
       rhs_x(:j-1) = -t(:j-1,j) ! rhs
       rhs_x( j  ) = 1.0_wp     ! solution
       i = j-1
       backsubst_real_do: do
        if(i.lt.1) exit backsubst_real_do

         ! The back-substitution can be made on one line or two lines
         if(i.eq.1) then
           complex_pair = .false.
         else
           complex_pair = t(i,i-1).ne.0.0_wp
         endif
         backsubst_real_cp_if: if(.not.complex_pair) then

           ! xi stores the computed solution before copying back to
           ! rhs_x; this allows various scalings
           call schur_solve_equation( t(i:i,i:i) , l_r , &
                rhs_x(i:i) , xi(1:1) , scale , xnorm , smin )
           ! scale x to avoid overflow
           if(xnorm.gt.1.0_wp) then
             if(rhs_x(i).gt.bignum/xnorm) then
               xi(1) = xi(1)/xnorm
               scale = scale/xnorm
             endif
           endif
           if(scale.ne.1.0_wp) rhs_x(:j) = scale*rhs_x(:j)
           rhs_x(i) = xi(1)
           ! back-substitution: the computed unknown goes to rhs
           if(i.gt.1) rhs_x(:i-1) = rhs_x(:i-1) - xi(1)*t(:i-1,i)

           i = i-1
         else ! backsubst_real_cp_if

           i = i-2
         endif backsubst_real_cp_if
       enddo backsubst_real_do

       ! At this point, rhs_x is an eigenvector of the Schur form T.
       ! To get and eigenvector of the original matrix, we multiply on
       ! the left by the matrix of the Schur vectors.

       ! Notice that, being the eigenvalue real, the eigenvector rhs_x
       ! is also real.

       if(j.gt.1) then ! the first eigenvector is [1 0 0 0 ...]^T
         ! Notice that this is correct since we work for decreasing j,
         ! and each multiplication uses only the first j Schur
         ! vectors. This allows overwriting Z.
         z(:,j) = matmul(z(:,:j),rhs_x(:j))
       endif

       ! Copy back in evec
       evec_r(:,j) = z(:,j)
       evec_i(:,j) = 0.0_wp
       scale = maxval(abs(evec_r(:,j)))
       if(scale.ne.0.0_wp) evec_r(:,j) = (1.0_wp/scale)*evec_r(:,j)

     else ! t22_if

       backsubst_imag_do: do
       enddo backsubst_imag_do

     endif t22_if
   enddo eigen_do
 
 contains

  pure subroutine schur_solve_equation(a,z,b,x,s,xnorm,smin)
   real(wp), intent(in)  :: a(:,:), z, b(:), smin
   real(wp), intent(out) :: x(:), s, xnorm

   real(wp) :: c, cnorm, bnorm, scale

    scale = 1.0_wp
    if(size(a,1).eq.1) then ! 1-by-1 block
      c = a(1,1) - z
      cnorm = abs(c)
      if(cnorm.lt.smin) then
        c = smin
        cnorm = smin
      endif
      bnorm = abs(b(1))
      ! The solution would be  x = b/c  however c can be very small
      if( (cnorm.lt.1.0_wp).and.(bnorm.gt.1.0_wp) &
         .and.(bnorm.gt.schur_bignum*cnorm) ) scale = 1.0_wp/bnorm
      x(1) = b(1)*scale/c
      xnorm = abs(x(1))
    else ! 2-by-2 block
    endif

    s = scale
  end subroutine schur_solve_equation

 end subroutine get_right_eigenvectors

!-----------------------------------------------------------------------
 
 !> Reduce a matrix in Hessenberg form: \f$A=UHU^T\f$, where \f$U\f$ is
 !! orthonormal and \f$H\f$ is upper Hessenberg.
 !!
 !! The algorithm requires \f$n-2\f$ steps, being \f$n\f$ the size of
 !! the matrix. At step \f$j\f$ we need to zero \f$A(j+2:n,j)\f$
 !! (notice that if \f$n\leq2\f$ the matrix is always in Hessenberg
 !! form). More in details, each step works as follows.
 !! <ul>
 !!  <li> Computation of the Householder matrix
 !!  \f{displaymath}{
 !!   P = I-\frac{2}{\|v\|^2}v\,v^T = I-\tau\,\tilde{v}\,\tilde{v}^T
 !!  \f}
 !!  or more precisely of the Householder vector \f$v\f$ and the
 !!  coefficient \f$\tau\f$. This vector is defined by the condition
 !!  that \f$A(1:j,j)\f$ is not modified and \f$A(j+2:n,j)\f$ is
 !!  zeroed. Denoting \f$x = A(:,j)\f$, we have
 !!  \f{displaymath}{
 !!   \left[
 !!   \begin{array}{rcl}
 !!    v(1:j)   & = & 0 \\
 !!    v(j+1)   & = & x(j+1)\pm\|x(j+1:n)\| \\
 !!    v(j+2:n) & = & x(j+2:n)
 !!   \end{array}
 !!   \right].
 !!  \f}
 !!  Following the algorithm of
 !!  <tt>gsl_linalg_householder_transform</tt>, we define a vector
 !!  \f$\tilde{v}\f$ which is obtained by the normalization
 !!  \f$\tilde{v}(j+1)=1\f$, so that
 !!  \f{displaymath}{
 !!   \tilde{v} = \frac{1}{x(j+1)\pm\|x(j+1:n)\|} v.
 !!  \f}
 !!  We then have
 !!  \f{displaymath}{
 !!   \tau = \frac{2}{\|\tilde{v}\|^2} =
 !!   \frac{x(j+1)\pm\|x(j+1:n)\|}{\|x(j+1:n)\|}.
 !!  \f}
 !!  Finally, the sign is chosen as the same sign of \f$x(j+1)\f$.
 !!  <li> Premultiplication by the Householder matrix: in terms of
 !!  columns, we have
 !!  \f{displaymath}{
 !!    \left[PA\right]_{:,k} = A_{:,k} - \tau(v\cdot A_{:,k})v.
 !!  \f}
 !!  There are however two simplifications:
 !!  <ul>
 !!   <li> because \f$v(1:j)=0\f$, \f$A(1:j,:)\f$ is left unchanged
 !!   <li> because \f$A(j+1:n,1:j-1)=0\f$, this part can be also
 !!   ignored.
 !!  </ul>
 !!  Summarizing, the left multiplication have to be carried out only
 !!  on the block \f$A(j+1:n,j:n)\f$.
 !!  <li> Postmultiplication by the Householder matrix: in terms of
 !!  rows, we have
 !!  \f{displaymath}{
 !!    \left[AP\right]_{i,:} = A_{i,:} - \tau(v\cdot A_{i,:})v^T.
 !!  \f}
 !!  There is however one simplifications because \f$v(1:j)=0\f$,
 !!  \f$A(:,1:j)\f$ is left unchanged. Summarizing, the right
 !!  multiplication have to be carried out only on the block
 !!  \f$A(:,j+1:n)\f$.
 !!  <li> Storing the Householder vectors: this is done in two
 !!  separate places:
 !!  <ul>
 !!   <li> the \f$\tau\f$s are stored in a dedicated array
 !!   <li> the vectors \f$\tilde{v}(j+2:n)\f$ are stored in the lower
 !!   part of the Hessenberg matrix
 !!   <li> notice that \f$\tilde{v}(j+1)=1\f$, so this does not need
 !!   to be stored anywhere (see also the comments in \c
 !!   get_householder_vector).
 !!  </ul>
 !! </ul>
 pure subroutine hessenberg_decomp(a,tau)
  real(wp), intent(inout) :: a(:,:)
  real(wp), intent(out)   :: tau(:)
 
  integer :: j, k, i, n
  real(wp) :: tau_j, wj, v(size(a,1))
  character(len=*), parameter :: &
    this_sub_name = 'hessemberg_decomp'

   ! Note that a must be square, because otherwise it can not be left
   ! and right multiplied by the same Householder matrix
   n = size(a,1)
   column_do: do j=1,n-2

     ! 1) Compute the Householder vector
     call get_householder_vector( tau_j , v(j+1:) , a(j+1:,j) )

     ! 2) Premultiply by the Householder matrix
     do k=j,n
       ! Warning: v(j+1) does not hold the correct value, which is
       ! always 1 (see the comments in get_householder_vector)
       wj = a(j+1,k) + dot_product(v(j+2:),a(j+2:,k))
       a(j+1 ,k) = a(j+1 ,k) - tau_j * wj
       a(j+2:,k) = a(j+2:,k) - tau_j * wj * v(j+2:)
     enddo

     ! 3) Postmultiply by the Householder matrix
     do i=1,n
       wj = a(i,j+1) + dot_product(v(j+2:),a(i,j+2:))
       a(i,j+1 ) = a(i,j+1 ) - tau_j * wj
       a(i,j+2:) = a(i,j+2:) - tau_j * wj * v(j+2:)
     enddo

     ! 4) Store the Householder vectors
     ! Note that at this point a(j+2:,j) should be zero, so we can
     ! safely reuse it to store v. One could avoid altogether
     ! computing these elements of a at steps 2 and 3, but this would
     ! result in longer and less readable code.
     tau(j) = tau_j
     a(j+2:,j) = v(j+2:)

   enddo column_do

 end subroutine hessenberg_decomp

!-----------------------------------------------------------------------
  
 !> Compute the Householder vector which annihilates x(2:)
 !!
 !! \warning <tt>v(1)</tt> holds \f$\pm\|x\|\f$, however this value
 !! does not belong to the Householder vector. In fact the
 !! Householder vector is normalized so that \f$v(1)=1\f$ and
 !! <tt>v(1)</tt> is used to store \f$\pm\|x\|\f$ for other purposes.
 pure subroutine get_householder_vector(tau,v,x)
  real(wp), intent(in)  :: x(:)
  real(wp), intent(out) :: tau
  real(wp), intent(out) :: v(:)

  real(wp) :: xnorm, alpha, beta, s
   
   ! First check: if there is only one element ->  nothing to do
   if(size(x).eq.1) then
     tau = 0.0_wp
     v   = 0.0_wp
     return
   endif

   !xnorm = norm2(x(2:))
       ! workaround for FERMI
   xnorm = sqrt(sum(x(2:)**2))

   ! Second check: maybe x is already in the right form
   if(xnorm.eq.0.0_wp) then
     tau = 0.0_wp
     v   = 0.0_wp
     return
   endif

   ! If we are here we have to do some work
   alpha = x(1)
   if(alpha.ge.0.0_wp) then
     beta = -hypot(alpha,xnorm)
   else
     beta =  hypot(alpha,xnorm)
   endif
   tau = (beta-alpha)/beta

   ! Now the vector v
   s = alpha-beta

   if(abs(s).gt.dbl_min) then
     v(1) = beta
     v(2:) = (1.0_wp/s) * x(2:)
   else ! we need to worry about round-off
     v(1) = beta
     v(2:) =   (dbl_epsilon/s)    * x(2:)
     v(2:) = (1.0_wp/dbl_epsilon) * v(2:)
   endif

 end subroutine get_householder_vector
 
!-----------------------------------------------------------------------

 !> Given a collection of Hessenberg vectors \f$v_i\f$ and
 !! corresponding coefficients \f$\tau_i\f$, build the corresponding
 !! orthonormal matrix \f$U\f$ such that
 !! \f{displaymath}{
 !!  U = U_1 \, U_2\, \ldots U_{n-2}
 !! \f}
 !! and
 !! \f{displaymath}{
 !!  U_i = I-\tau_i\,\tilde{v}_i\,\tilde{v}_i^T.
 !! \f}
 !! For further details see <tt>hessenberg_decomp</tt>.
 pure subroutine hessenberg_unpack(u,h,tau)
  !> matrix in Hessenberg form; the Hessenberg vectors must be stored
  !! in the lower part, as described in <tt>hessenberg_decomp</tt>
  real(wp), intent(in) :: h(:,:)
  real(wp), intent(in) :: tau(:) !< Hessenberg coefficients
  real(wp), intent(out):: u(:,:) !< the transformation matrix

  integer :: j, i, n
  real(wp) :: wj

   n = size(h,1)
   
   ! initialize u as the identity matrix
   u = 0.0_wp
   do j=1,n
     u(j,j) = 1.0_wp
   enddo

   ! apply the Ui matrices (right multiplications)
   do j=1,n-2
     do i=1,n
       wj = u(i,j+1) + dot_product(h(j+2:,j),u(i,j+2:))
       u(i,j+1 ) = u(i,j+1 ) - tau(j) * wj
       u(i,j+2:) = u(i,j+2:) - tau(j) * wj * h(j+2:,j)
     enddo
   enddo

 end subroutine hessenberg_unpack

!-----------------------------------------------------------------------

end module mod_eigen

